{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "<h1> Create Keras DNN model </h1>\n",
    "\n",
    "<h2>Learning Objectives</h2>\n",
    "<ol>\n",
    "<li>Use the tf.data API to create our ML datasets</li>\n",
    "<li>Use the Keras functional API to create our DNN architecture</li>\n",
    "<li>Deploy a version of the model on Cloud AI Platform</li>\n",
    "</ol>\n",
    "\n",
    "Note: This notebook requires TensorFlow 2.1 as we are creating a model using Keras. \n",
    "\n",
    "__TODO__: Complete the lab notebook #TODO sections. You can refer to the [../solutions/3_keras_dnn.ipynb](solutions/) notebook for reference. \n"
   ]
  },
  {

"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Ensure the right version of Tensorflow is installed.\n",
"!pip freeze | grep tensorflow==2.1"
]
},

  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "# change these to try this notebook out\n",
    "BUCKET = 'cloud-training-demos-ml'\n",
    "PROJECT = 'cloud-training-demos'\n",
    "REGION = 'us-east1' #'us-central1'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ['BUCKET'] = BUCKET\n",
    "os.environ['PROJECT'] = PROJECT\n",
    "os.environ['REGION'] = REGION"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "%%bash\n",
    "if ! gcloud storage ls | grep -q gs://${BUCKET}/; then\n",
    "  gcloud storage buckets create --location ${REGION} gs://${BUCKET}\n",
    "fi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "%%bash\n",
    "ls *.csv"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "## Create Keras model\n",
    "<p>\n",
    "First, write an input_fn to read the data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "import shutil\n",
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "print(tf.__version__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "# Determine CSV, label, and key columns\n",
    "CSV_COLUMNS = 'weight_pounds,is_male,mother_age,plurality,gestation_weeks,key'.split(',')\n",
    "LABEL_COLUMN = 'weight_pounds'\n",
    "KEY_COLUMN = 'key'\n",
    "\n",
    "# Set default values for each CSV column. Treat is_male and plurality as strings.\n",
    "DEFAULTS = [[0.0], ['null'], [0.0], ['null'], [0.0], ['nokey']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load the training data\n",
    "# use the tf.data API to create and load a dataset. Be sure to map features and labels.\n",
    "# https://www.tensorflow.org/api_docs/python/tf/data/experimental\n",
    "def load_dataset(pattern, batch_size=1, mode=tf.estimator.ModeKeys.EVAL):\n",
    "  dataset = (\n",
    "              # TODO create the dataset\n",
    "              )\n",
    " \n",
    "  if mode == tf.estimator.ModeKeys.TRAIN:\n",
    "        # TODO add shuffling to the dataset if it's in training mode:\n",
    "        dataset = dataset\n",
    "  dataset = dataset.prefetch(1) # take advantage of multi-threading; 1=AUTOTUNE\n",
    "  return dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "Next, define the feature columns. mother_age and gestation_weeks should be numeric.\n",
    "The others (is_male, plurality) should be categorical."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Build a simple Keras DNN using its Functional API\n",
    "def rmse(y_true, y_pred):\n",
    "    return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true))) \n",
    "\n",
    "# Helper function to handle categorical columns\n",
    "def categorical_fc(name, values):\n",
    "  return tf.feature_column.indicator_column(\n",
    "    tf.feature_column.categorical_column_with_vocabulary_list(name, values))\n",
    "\n",
    "def build_dnn_model():\n",
    "    # input layer\n",
    "    inputs = {\n",
    "        colname : tf.keras.layers.Input(name=colname, shape=(), dtype='float32')\n",
    "           for colname in [] # TODO complete array of numeric input columns\n",
    "    }\n",
    "    inputs.update({\n",
    "        colname : tf.keras.layers.Input(name=colname, shape=(), dtype='string')\n",
    "            for colname in [] # TODO complete array of string input columns   \n",
    "    })\n",
    "\n",
    "    # feature columns from inputs\n",
    "    feature_columns = {\n",
    "        colname : tf.feature_column.numeric_column(colname)\n",
    "            for colname in [] # TODO complete array of numeric feature columns\n",
    "    }\n",
    "    if False:\n",
    "        # Until TF-serving supports 2.0, so as to get servable model\n",
    "        feature_columns['is_male'] = categorical_fc('is_male', ['True', 'False', 'Unknown'])\n",
    "        feature_columns['plurality'] = categorical_fc('plurality',\n",
    "                          ['Single(1)', 'Twins(2)', 'Triplets(3)',\n",
    "                           'Quadruplets(4)', 'Quintuplets(5)','Multiple(2+)'])\n",
    "\n",
    "    # TODO: Use the feature columns and inputs above to create a DNN of [64, 32]\n",
    "\n",
    "    \n",
    "\n",
    "    model = tf.keras.models.Model(inputs, output)\n",
    "    model.compile(optimizer='adam', loss='mse', metrics=[rmse, 'mse'])    \n",
    "    return model\n",
    "\n",
    "print(\"Here is our DNN architecture so far:\\n\")\n",
    "\n",
    "# note how to use strategy to do distributed training\n",
    "strategy = tf.distribute.MirroredStrategy()\n",
    "with strategy.scope():\n",
    "    model = build_dnn_model()\n",
    "print(model.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can visualize the DNN using the Keras plot_model utility."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.keras.utils.plot_model(model, 'dnn_model.png', show_shapes=False, rankdir='LR')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "## Train and evaluate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "TRAIN_BATCH_SIZE = 32\n",
    "NUM_TRAIN_EXAMPLES = 10000 * 5 # training dataset repeats, so it will wrap around\n",
    "NUM_EVALS = 5  # how many times to evaluate\n",
    "NUM_EVAL_EXAMPLES = 10000 # enough to get a reasonable sample, but not so much that it slows down\n",
    "\n",
    "trainds = load_dataset('train*', TRAIN_BATCH_SIZE, tf.estimator.ModeKeys.TRAIN)\n",
    "evalds = load_dataset('eval*', 1000, tf.estimator.ModeKeys.EVAL).take(NUM_EVAL_EXAMPLES//1000)\n",
    "\n",
    "steps_per_epoch = NUM_TRAIN_EXAMPLES // (TRAIN_BATCH_SIZE * NUM_EVALS)\n",
    "\n",
    "history = model.fit(trainds, \n",
    "                    validation_data=evalds,\n",
    "                    epochs=NUM_EVALS, \n",
    "                    steps_per_epoch=steps_per_epoch)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "## Visualize loss curve"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# plot\n",
    "import matplotlib.pyplot as plt\n",
    "nrows = 1\n",
    "ncols = 2\n",
    "fig = plt.figure(figsize=(10, 5))\n",
    "\n",
    "for idx, key in enumerate(['loss', 'rmse']):\n",
    "    ax = fig.add_subplot(nrows, ncols, idx+1)\n",
    "    plt.plot(history.history[key])\n",
    "    plt.plot(history.history['val_{}'.format(key)])\n",
    "    plt.title('model {}'.format(key))\n",
    "    plt.ylabel(key)\n",
    "    plt.xlabel('epoch')\n",
    "    plt.legend(['train', 'validation'], loc='upper left');"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Save the model\n",
    "\n",
    "Let's wrap the model so that we can supply keyed predictions, and get the key back in our output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Serving function that passes through keys\n",
    "@tf.function(input_signature=[{\n",
    "      'is_male': tf.TensorSpec([None,], dtype=tf.string, name='is_male'),\n",
    "      'mother_age': tf.TensorSpec([None,], dtype=tf.float32, name='mother_age'),\n",
    "      'plurality': tf.TensorSpec([None,], dtype=tf.string, name='plurality'),\n",
    "      'gestation_weeks': tf.TensorSpec([None,], dtype=tf.float32, name='gestation_weeks'),\n",
    "      'key': tf.TensorSpec([None,], dtype=tf.string, name='key')\n",
    "}])\n",
    "def my_serve(inputs):\n",
    "    feats = inputs.copy()\n",
    "    key = feats.pop('key')\n",
    "    output = model(feats)\n",
    "    return {'key': key, 'babyweight': output}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "import shutil, os, datetime\n",
    "OUTPUT_DIR = './export/babyweight'\n",
    "shutil.rmtree(OUTPUT_DIR, ignore_errors=True)\n",
    "EXPORT_PATH = os.path.join(OUTPUT_DIR, datetime.datetime.now().strftime('%Y%m%d%H%M%S'))\n",
    "tf.saved_model.save(model, EXPORT_PATH, signatures={'serving_default': my_serve})\n",
    "print(\"Exported trained model to {}\".format(EXPORT_PATH))\n",
    "os.environ['EXPORT_PATH'] = EXPORT_PATH"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!find $EXPORT_PATH"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Deploy trained model to Cloud AI Platform\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!saved_model_cli show --tag_set serve --signature_def serving_default --dir {EXPORT_PATH}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%bash\n",
    "MODEL_NAME=\"babyweight\"\n",
    "VERSION_NAME=\"dnn\"\n",
    "MODEL_LOCATION=$EXPORT_PATH\n",
    "echo \"Deleting and deploying $MODEL_NAME $MODEL_VERSION from $MODEL_LOCATION ... this will take a few minutes\"\n",
    "\n",
    "if [[ $(gcloud ai-platform models list --format='value(name)' | grep $MODEL_NAME) ]]; then\n",
    "    echo \"The model named $MODEL_NAME already exists.\"\n",
    "else\n",
    "    # create model\n",
    "    echo \"Creating $MODEL_NAME model now.\"\n",
    "    gcloud ai-platform models create --regions=$REGION $MODEL_NAME\n",
    "fi\n",
    "\n",
    "if [[ $(gcloud ai-platform versions list --model $MODEL_NAME --format='value(name)' | grep $VERSION_NAME) ]]; then\n",
    "    echo \"Deleting already the existing model $MODEL_NAME:$VERSION_NAME ... \"\n",
    "    gcloud ai-platform versions delete --model=$MODEL_NAME $VERSION_NAME\n",
    "    echo \"Please run this cell again if you don't see a Creating message ... \"\n",
    "    sleep 2\n",
    "fi\n",
    "\n",
    "# TODO create model on Cloud AI Platform. Use python-version 3.5 and runtime-version 1.14\n",
    "# https://cloud.google.com/sdk/gcloud/reference/ai-platform/versions/create\n",
    "echo \"Creating $MODEL_NAME:$VERSION_NAME\"\n",
    "gcloud ai-platform versions create # TODO complete the statement"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Monitor the model creation at [GCP Console > AI Platform](https://console.cloud.google.com/mlengine/models/taxifare/) and once the model version `dnn` is created, proceed to the next cell.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%writefile input.json\n",
    "{\"key\": \"b1\", \"is_male\": \"True\", \"mother_age\": 26.0, \"plurality\": \"Single(1)\", \"gestation_weeks\": 39}\n",
    "{\"key\": \"b2\", \"is_male\": \"True\", \"mother_age\": 33.0, \"plurality\": \"Single(1)\", \"gestation_weeks\": 41}\n",
    "{\"key\": \"g1\", \"is_male\": \"False\", \"mother_age\": 26.0, \"plurality\": \"Single(1)\", \"gestation_weeks\": 39}\n",
    "{\"key\": \"g2\", \"is_male\": \"False\", \"mother_age\": 33.0, \"plurality\": \"Single(1)\", \"gestation_weeks\": 41}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!gcloud ai-platform predict --model babyweight --json-instances input.json --version dnn"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## main.py\n",
    "\n",
    "This is the code that exists in [serving/application/main.py](serving/application/main.py), i.e. the code in the web application that accesses the ML API."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from oauth2client.client import GoogleCredentials\n",
    "from googleapiclient import discovery\n",
    "\n",
    "credentials = GoogleCredentials.get_application_default()\n",
    "api = discovery.build('ml', 'v1', credentials=credentials)\n",
    "project = PROJECT\n",
    "model_name = 'babyweight'\n",
    "version_name = 'dnn'\n",
    "\n",
    "input_data = {\n",
    "  'instances': [\n",
    "    {\n",
    "      'key': 'b1',\n",
    "      'is_male': 'True',\n",
    "      'mother_age': 26.0,\n",
    "      'plurality': 'Single(1)',\n",
    "      'gestation_weeks': 39\n",
    "    },\n",
    "    {\n",
    "      'key': 'g1',\n",
    "      'is_male': 'False',\n",
    "      'mother_age': 29.0,\n",
    "      'plurality': 'Single(1)',\n",
    "      'gestation_weeks': 38\n",
    "    },\n",
    "    {\n",
    "      'key': 'b2',\n",
    "      'is_male': 'True',\n",
    "      'mother_age': 26.0,\n",
    "      'plurality': 'Triplets(3)',\n",
    "      'gestation_weeks': 39\n",
    "    },\n",
    "    {\n",
    "      'key': 'u1',\n",
    "      'is_male': 'Unknown',\n",
    "      'mother_age': 29.0,\n",
    "      'plurality': 'Multiple(2+)',\n",
    "      'gestation_weeks': 38\n",
    "    },\n",
    "  ]\n",
    "}\n",
    "\n",
    "parent = 'projects/%s/models/%s/versions/%s' % (project, model_name, version_name)\n",
    "prediction = api.projects().predict(body=input_data, name=parent).execute()\n",
    "print(prediction)\n",
    "print(prediction['predictions'][0]['babyweight'][0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "Copyright 2020 Google Inc. Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
